home *** CD-ROM | disk | FTP | other *** search
- .MODEL small
-
- COMMENT #
-
- "AP-SLIDER" (LZW) DYNAMIC DICTIONARY COMPRESSION
- uses 13 bit pointers and
- 8k dictionary
- w/update
- (oldest leaf is deleted)
- (one-at-a-time)
- Memory needed: 100k
-
- Written in MASM 6.0
- Miles Pawski, 1992
-
- THIS PROGRAM IS FREE
-
- To assemble & link under MASM 6.0 (use ML.EXE): ML kboom11.asm
-
- Requirements: 8086+
-
- Please note: Stack frames are generated automatically by
- procedures called by the INVOKE directive.
- The stack is automatically cleaned up by the
- RET instruction. Local variables are automatically
- referenced and coded. You can eliminate the INVOKE
- directive by passing parameters in variables or
- registers. The USES list automatically pushes (on entry)
- and pops (on ret) the indicated registers. You can
- eliminate this with the appropriate code within the
- procedure.
-
- .WHILE .IF .ELSEIF .ELSE, etc. are automatic generations
- of the cmp/jne/jmp... routines. An .IF .ENDIF block
- is basically a cmp/jne routine. Adding an .ELSEIF
- puts in a cmp/jump-around, etc.
-
-
- .STARTUP generates the following code (if default near-
- stack is used - this places the stack in DGROUP so ss:sp
- needs to be adjusted). Only the first two lines are
- required if using using FARSTACK (stack in its own
- segment). FARSTACK can be specified by indicating
- it in the .MODEL directive: .MODEL small,farstack
-
- mov dx,DGROUP
- mov ds,dx
- mov bx,ss
- sub bx,dx
- shl bx,1
- shl bx,1
- shl bx,1
- shl bx,1
- cli
- mov ss,dx
- add sp,bx
- sti
-
-
- .EXIT generates terminate program code using int 21h,
- function 04ch.
-
- The ASSUME assembler directive was used to automatically
- provide various segment over-rides for various array
- variables. For examble, ASSUME es:seg parfield generates
- this code: es:parfield.
-
- THIS PROGRAM is based on James Storer's 'squeeze' source
- code found in his book DATA COMPRESSION, METHODS AND
- THEORY, 1988 - Computer Science Press. Modification
- and methods were based on experimentation, general
- tree theory of data structures and ideas put forth in
- Mr. Storer's book. Mr. Storer's original source code
- is in PASCAL. This program will hopefully provide a
- better understanding of the LZW method and complex,
- linked tree structures and there use.
-
- #
-
-
- ;---------------------------MACROS---------------------------------------
-
- child macro mainptr,char
- xor ax,ax ;;returns pointer value in ax
- mov cx,char
- mov bx,mainptr
- shl bx,1 ;;make mainptr word indexer
- mov ax,lcfield[bx] ;;get child from lcfield
- mov bx,ax ;;put in bx to index
- shl bx,1 ;;make word ptr
- .WHILE ax != NILPTR && cx != ctrfield[bx]
- mov ax,rsibfield[bx] ;;cx=indexed char
- mov bx,ax ;;if first child <> NILPTR and
- shl bx,1 ;;char <> indexed char then walk the
- .ENDW ;;linked list of siblings of child
- endm ;;until we find a match
- ;;no match returns NILPTR
-
- parent macro mainptr
- xor ax,ax ;returns pointer value in ax
- mov bx,mainptr
- shl bx,1
- mov ax,es:parfield[bx]
- endm
-
- ctr1 macro mainptr
- xor ax,ax ;returns pointer value in ax
- mov bx,mainptr
- shl bx,1
- mov ax,ctrfield[bx]
- endm
-
- print macro char
- mov ah,02h
- forc arg1,<char>
- mov dl,"&arg1"
- int 21h ;;print string to screen: print HELLO
- endm
- endm
-
- close macro handle
- mov ah,3eh
- mov bx,handle ;;file closure
- int 21h
- endm
-
- writestring macro string,length
- mov ah,40h
- mov bx,1
- mov cx,length ;;write string to screen
- lea dx,string
- int 21h
- endm
-
- writefile macro var,handle,len
- mov ah,40h
- mov bx,handle ;;write to file handle from variable
- mov cx,len
- lea dx,var
- int 21h
- endm
-
- readfile macro var,handle,len
- mov ah,3fh
- mov bx,handle ;;read from file handle into variable
- mov cx,len
- lea dx,var
- int 21h
- endm
-
- ;------------------------------------------------------------------------
- ; ADD_LEAF macro. Adds a leaf to the tree.
- ; Input: Parent node, node (code) to be added, character (mval)
- ; ** See James Storer's DATA COMPRESSION, METHODS AND THEORY
- ; for more explanations,theory and original PASCAL code.
- ;------------------------------------------------------------------------
-
- add_leaf macro
- .IF prevptr != NILPTR
- push dx
- push si
- push di
- mov bx,trieptr ;new code
- mov dx,bx ;save a copy of new code in dx
- mov ax,mval ;match character
- mov di,prevptr ;parent node in di
- shl bx,1
- mov ctrfield[bx],ax ;ctrfield(mainptr)=char
- mov parfield[bx],di ;parfield(mainptr)=parptr
- mov lcfield[bx],NILPTR ;lcfield(mainptr)=NILPTR
- mov lsibfield[bx],NILPTR
- shl di,1 ;make parptr word indexer
- mov si,lcfield[di] ;si=lcfield(parptr)
- mov rsibfield[bx],si ;rsibfield(mainptr)=lcfield(parptr)
- .IF si != NILPTR ;si=lcfield(parptr)
- shl si,1
- mov lsibfield[si],dx ;lsibfield(lcfield(parptr))=mainptr
- .ENDIF
- mov lcfield[di],dx ;lcfield(parptr)=ptr
- pop di
- pop si
- pop dx
- .ENDIF
- endm
-
- ;----------------------------------------------------------------------
- ; DELETE_LEAF macro. Deletes a leaf from the tree.
- ; Input: Node (code) to be deleted. Passed in variable (trieptr)
- ; If node has siblings, they will be connected to each other properly
- ; after the node is deleted.
- ; ** See James Storer's DATA COMPRESSION, METHODS AND THEORY
- ; for more explanations,theory and original PASCAL code
- ;----------------------------------------------------------------------
-
- delete_leaf macro
- push si
- push di
- mov bx,trieptr ;bx=code to be deleted
- shl bx,1
- mov ax,rsibfield[bx] ;ax=rsibfield(mainptr)
- .IF lsibfield[bx] != NILPTR
- mov di,lsibfield[bx] ;connect left sibling and right sibling
- shl di,1 ;through rsibfield of left sibling (huh?)
- mov rsibfield[di],ax ;rsibfield[lsibfield(mainptr))=
- .ELSE ; rsibfield(mainptr)
- mov di,parfield[bx] ;di=parfield(mainptr)
- shl di,1
- mov lcfield[di],ax ;lcfield(parfield(mainptr))=
- .ENDIF ; rsibfield(mainptr)
- .IF ax != NILPTR
- mov di,lsibfield[bx] ;di=lsibfield(mainptr)
- mov si,ax ;make left sibling of deleted leaf the
- shl si,1 ;left sibling of deleted leaf's right sib.
- mov lsibfield[si],di ;lsibfield[rsibfield(mainptr))=
- .ENDIF ; lsibfield(mainptr)
- pop di
- pop si
- endm
-
- ;--------------------------------------------------------------------------
-
- ;MASM 6.0 is 1-pass assembly. Procedures called with the INVOKE
- ;directive that are physically located AFTER the call in the source
- ;code must be declared with PROTO. Far procedures must be declared
- ;also although this code has none.
-
-
- cmdline PROTO near C,arg1:WORD,arg2:WORD,arg3:WORD
- caps PROTO near C,arg1:WORD,arg2:WORD
-
-
- .STACK
-
-
-
- .DATA
-
- dictsize WORD 00h
- curptr WORD 00h
- curlen WORD 00h
- prevptr WORD 00h
- trieptr WORD 00h
- mval WORD 00h
- mindex WORD 00h
- prevlen WORD 00h
- mstart WORD 00h
- outcount WORD 00h
- incount WORD 00h
- bytes_read WORD ?
- eight_count BYTE 00h
- sixteen_count BYTE 16
- all_over BYTE 00h
- bigplace WORD 256
-
- filename BYTE 50 dup (00)
- tempfile BYTE 50 dup (00)
- parabuf BYTE 50 dup (00)
- paraflag BYTE 00h
- compflag BYTE 00h
- decompflag BYTE 00h
- file_handle WORD ?
- file_handle2 WORD ?
- inputbyte WORD 00h
- end_flag BYTE 00h
- init_size DWORD ?
- master_size DWORD 00h
- filelen WORD ?
- filelen2 WORD ?
- comp_size DWORD ?
- dictflag BYTE 00h
- incflag BYTE 00h
- work_code BYTE 0,0,0,0
- ascii_string BYTE 10 dup (00h)
- count WORD 00h
- xdate WORD ?
- xtime WORD ?
- nologo_flag BYTE 00h
- comp_flag BYTE 00h
- decomp_flag BYTE 00h
-
-
-
- comp BYTE "Compressing: "
- spc BYTE 70 dup (20h)
- fsize BYTE "Comp. size: "
- isize BYTE "Orig. size: "
- ratio BYTE "Ratio: "
- perc BYTE "%"
- crlf BYTE 13,10
- expand BYTE "Expanding: "
- arrow BYTE " --> "
- fnl BYTE "Exp. Size: "
- kboom_code BYTE "¿MP¿"
- sorry0 BYTE "Sorry..."
- sorry1 BYTE " was not compressed by Kboom11"
-
- logo BYTE 13,10," Kboom11 LZW File Encoding/Decoding Utility featuring",13,10,
- " 8k Dynamic Dictionary w/discrete update",13,10,
- " Miles Pawski CIS: 70473,527 1992",13,10,13,10
-
-
- format BYTE "FORMAT: kboom11 [input file] [output file] [/c][/d][/n]",13,10,
- " /c - Compression /d - Decompression /n - No logo"
-
-
- ;Assembly-time constants
-
- MAXINCREMENT EQU 4 ;AP - All prefixes
- MAXMATCH EQU 100 ;max string match=100
- STATICSIZE EQU 256 ;constant dictionary gaurantees first match
- DICTIONARY EQU 8192
- MAXPTR EQU DICTIONARY-2 ;highest pointer
- NILPTR EQU DICTIONARY-1 ;end code - root code
-
- ;For experimentation. If the DICTIONARY size is changed, this
- ;text sequence automatically changes CODESIZE and BITSHIFT to fit
- ;the DICTIONARY. CODESIZE and BITSHIFT are used in the code to
- ;read and write pointers from and to the bit stream.
- ;Use only the below DICTIONARY values.
-
- IF DICTIONARY EQ 512
- CODESIZE EQU 9
- BITSHIFT EQU 7
- ELSEIF DICTIONARY EQ 1024
- CODESIZE EQU 10
- BITSHIFT EQU 6
- ELSEIF DICTIONARY EQ 2048
- CODESIZE EQU 11
- BITSHIFT EQU 5
- ELSEIF DICTIONARY EQ 4096
- CODESIZE EQU 12
- BITSHIFT EQU 4
- ELSEIF DICTIONARY EQ 8192
- CODESIZE EQU 13
- BITSHIFT EQU 3
- ENDIF
-
- ; I split the array fields between the near and far BSS (un-
- ; initialized data) segments. I couldn't get all the variables
- ; and arrays into one data segment, especially when using an 8k
- ; dictionary.
-
- .DATA? ;DATA? is a default BSS segment in DGROUP
-
- ;-------------------------------------------
- fielder label WORD
-
- ctrfield WORD DICTIONARY dup (?)
- lcfield WORD DICTIONARY dup (?)
- ;-------------------------------------------
-
- match BYTE 100 dup (?),?
-
-
-
- OUTSEG SEGMENT PARA PRIVATE 'FAR_BSS'
-
- ;----------------------------------------
- fielder_two label WORD
-
- parfield WORD DICTIONARY dup (?)
- rsibfield WORD DICTIONARY dup (?)
- lsibfield WORD DICTIONARY dup (?)
- ;----------------------------------------
-
- outbuffer BYTE 8000 dup (?) ;file i/o buffers
- inbuffer BYTE 8000 dup (?)
- OUTSEG ENDS
-
-
- .CODE
-
- .STARTUP
-
- ;-----------------------------MAIN PROGRAM------------------------------
-
- invoke cmdline, ADDR filename,ADDR tempfile,ADDR parabuf
- cmp filelen,00h ;if source filename length=0,
- je form ;quit with FORMAT display
- cmp paraflag,01h ;if no parameters, quit with
- jne form ;FORMAT display
-
- call initialize ;set variables
- mov di,seg outbuffer
- mov es,di ;set es=far (output) segment
-
- ASSUME es:seg parfield,es:seg lsibfield,es:seg rsibfield
- ;ASSUME usage:
- ;sets assembler for automatic segment
- ;over-rides for the listed variables
-
- invoke caps,ADDR parabuf,50 ;capitalize parameters
-
- mov di, offset parabuf ;read parameters
- mov cx,50
- @@:
- mov ax,word ptr [di]
- .IF ax=="N/"
- mov nologo_flag,01h
- .ELSEIF ax=="C/"
- mov comp_flag,01h
- .ELSEIF ax=="D/"
- mov decomp_flag,01h
- .ENDIF
- inc di
- loop @B
-
- .IF comp_flag == 01h
- call compression
- jmp final_exit
- .ELSEIF decomp_flag == 01h
- call decompression
- jmp final_exit
- .ENDIF
-
- form:
- writestring format,SIZEOF format
-
- final_exit:
-
- .EXIT
-
- ;-------------------------------------------------------------------------
-
-
-
- ;╔══════════════════════════════════════════════════════════════════════╗
- ;║ ║
- ;║ ║
- ;║ PROCEDURE SECTION ║
- ;║ ║
- ;║ ║
- ;╚══════════════════════════════════════════════════════════════════════╝
-
- ;------------------------------------------------------------------------
- ; |
- ; MAIN COMPRESSION SEQUENCE |
- ; |
- ;------------------------------------------------------------------------
-
- compression proc near PRIVATE
-
- call open_source_file ;open source file
- .IF CARRY?
- jmp final
- .ENDIF
-
- push es
- mov ah,4eh ;function 4eh
- mov cx,10h ;file attribute
- mov dx,offset filename ;point to filespec
- int 21h ;"first match" interrupt
-
- mov ah,2fh
- int 21h ;get dta
- mov ax,word ptr es:[bx+1ah] ;get file size from dta
- mov dx,word ptr es:[bx+1ah][2]
- mov word ptr [init_size][0],ax ;record in variable
- mov word ptr [init_size][2],dx
- mov ax,word ptr es:[bx+16h] ;get file time
- mov xtime,ax
- mov ax,word ptr es:[bx+18h] ;get file date
- mov xdate,ax
- pop es
-
-
- call open_second_file ;open target file
- .IF CARRY?
- jmp final
- .ENDIF
-
- .IF nologo_flag != 01h
- writestring logo,SIZEOF logo
- .ENDIF
-
- writestring comp,SIZEOF comp ;"Compressing: "
- writestring filename,filelen ; [filename]
-
- writefile kboom_code,file_handle2,4 ;write kboom code to file in
- ;first 4 bytes so we won't try to
- ;expand any files not generated by
- ;this program: ¿MP¿
-
- writefile init_size,file_handle2,4 ;write next 4 bytes to file
- ;which represent what file size
- ;should be when expansion is
- ;finished
-
- writefile xdate,file_handle2,2 ;write date to file
- writefile xtime,file_handle2,2 ;write time to file
-
- mov di,offset outbuffer ;set output buffer. es already set
-
- call read_file ;read file into buffer
- mov al, es:[si] ;get first byte from buffer
- inc si
- inc incount
- xor ah,ah ;convert to word
- mov inputbyte,ax ;move byte into variable
-
-
- ;--------- MAIN COMPRESSION LOOP ----------
-
- .WHILE 1 ;loop indefinitely
-
- mov ax,inputbyte ;same as: child NILPTR, inputbyte
- mov trieptr,ax ;first ptr is always character and
- ;gets us into the root level
-
- mov ax,curptr
- mov prevptr,ax ;prevptr=curptr
- mov ax,curlen
- mov prevlen,ax ;prevlen=curlen
- mov curlen,0000h
-
- .WHILE trieptr != NILPTR ;first trieptr is never NILPTR
-
- mov ax,trieptr ;curptr=trieptr
- mov curptr,ax
- mov bx,curlen
- mov ax,inputbyte
- mov match[bx],al ;record byte in array starting
- inc curlen ;at postion 00. Curlen indexes in.
- ;This is used by update process.
- ;First byte in loop is always a match
-
- mov ax,incount
- cmp ax,bytes_read ;see if we'er at end of input buffer
- jae read_leave ;if not, keep reading
-
- goon:
- mov al,es:[si] ;get new byte
- inc si
- inc incount
- xor ah,ah
- mov inputbyte,ax ;place in variable
- child curptr,inputbyte ;see if new byte is child of curptr
- mov trieptr,ax ;result in ax -> trieptr
- ;keep walking down the tree if
- .ENDW ;we have a match (trieptr <> NILPTR)
-
- leaving:
- mov mstart,00h
- mov bx,curptr ;mov curptr in bx
-
- xor ax,ax
- mov al,sixteen_count ;get number of bit positions left
-
- repeat BITSHIFT
- shl bx,1 ;CODESIZE bit pointer will now start
- endm ;at left end of word. We want to
- ;output curptr into bit stream.
- xor cx,cx
- bigloop:
- cmp cx,CODESIZE ;check code bit counter
- je outbig ;done? then leave. Shift both in
- shl bx,1 ;unison. dx holds bits.
- rcl dx,1 ;carry added to dl if set by bx.
- inc cx ;We are copying bx to dx bit by
- dec ax ;bit. ax=dx bit counter
- jnz bigloop
-
- ;---------------- JUMP-OUTS & FALL-THROUGHS ----------------------
- ; conditional branches ;
- ; kept out of main loops for speed ;
- ; ;
- mov ax,dx ;
- stosw ;store word in buffer ;
- xor dx,dx ;clear out dl ;
- mov ax,16 ;reset output bit counter ;
- inc outcount ;record byte output ;
- inc outcount ;
- cmp outcount,8000 ;see if we're at end of output ;
- jae next6 ;buffer. If not, go on. ;
- ;if at end, flush the buffer ;
- next5: ;
- cmp cx,CODESIZE-8 ;if we have enough bits left ;
- ja bigloop ;and dx is clear, then we can ;
- mov dl,bh ;move a whole byte (bh) at once. ;
- mov bh,bl ;move bl over to left of word ;
- mov ax,8 ;adjust output bit counter ;
- add cx,8 ;adjust bit counter for CODESIZE ;
- jmp bigloop ;
- ;
- next6: ;
- call write_file ;flush buffer to file ;
- jmp next5 ;continue ;
- ;
- read_leave: ;
- .IF end_flag==01h ;check for eof. End_flag set if eof.;
- mov all_over,01h ;set another flag so we can process ;
- jmp leaving ;the last byte ;
- .ENDIF ;
- call read_file ;read more from file into buffer ;
- jmp goon ;
- ; ;
- ;-----------------------------------------------------------------
-
-
- outbig:
- mov sixteen_count,al ;save bit count
- cmp all_over,01h ;last byte? then leave
- je endup
-
- call update ;update the tree
-
- .ENDW
-
- ;-------------- END MAIN COMPRESSION LOOP ------------
-
- endup:
-
- .IF sixteen_count > 00 ;see if we have leftover in dx
- mov cl,sixteen_count ;if so, shift them all to the
- shl dx,cl ;left so we don't mess up the
- mov ax,dx ;bit stream.
- stosw
- inc outcount ;record final bytes
- inc outcount
- .ENDIF
-
- call write_file ;flush the buffer
-
-
- ;--------- INFO/DECORATION ---------------
-
- writestring crlf,2
-
- writestring isize,SIZEOF isize ;"Orig. Size: "
- mov ax,word ptr init_size[0]
- mov dx,word ptr init_size[2]
- call convert ;Convert to decimal string.
- writestring crlf,2 ;value is passed to process in
- ;dx:ax pair
- writestring fsize,SIZEOF fsize
- mov ax,word ptr master_size[0]
- mov dx,word ptr master_size[2]
- call convert
- ;"Comp. size; "
- writestring crlf,2 ; carriage return-line feed
- writestring ratio,SIZEOF ratio ;"Ratio: "
-
- ;Ratio is not % saved but, rather
- ;the new file length divided by the old.
- ;However, to achieve this, I multiply the
- ;new file length by 100 first, then divide
- ;it by the old length.
- xor bx,bx
- mov bx,100
- xor dx,dx ;file length.
- mov ax,word ptr master_size[0] ;get low word
- mul bx ;multiply by 100
- mov cx,dx ;save dx in cx
- mov si,ax ;save low word in si
- xor dx,dx
- mov ax,word ptr master_size[2] ;get high word dx ax
- mul bx ;multiply by 100 bx
- add dx,cx ;add saved high word ---------
- add dx,ax ;add high to high (bx*ax) cx si
- mov ax,si ;get back low (bx*dx) dx ax
- ; -------------
- mov bx,word ptr init_size[0] ;Oh well, -> dx+ax+cx,si
- mov cx,word ptr init_size[2]
-
- ;dx:ax now has the new file size multiplied by 100
- ;cx:bx has original file size
-
-
- ;---------DIVISION ROUTINE---------
-
- div_num_1:
- or cx,cx ; Divide both numbers by 2
- je div_num_2 ; until high word of
- shr dx,1 ; divisor (cx) is zero.
- rcr ax,1 ; if cf is set by dx shift, the bit
- ; will be added to the left of ax during
- shr cx,1 ; ax rcl
- rcr bx,1
- jmp div_num_1
- div_num_2:
- push ax ;Save low word
- mov ax,dx
- xor dx,dx
- div bx ;Divide high word
- mov cx,ax ;Save high quotent
- pop ax
- add ax,dx ;add remainder to low word
- div bx ;Divide low word
- mov si,dx ;save remainder in si
- mov dx,cx ;386 code is faster and easier and I would
- ;much rather use it than going through all
- ;this BS. 32-bit registers FOREVER!
-
- push dx ;save high word
- push ax ;save low word
- xor dx,dx ;zero dx
- mov ax,si ;get back last remainder
- mov cx,10 ;multiply by 10
- mul cx
-
- div bx ;divide by divisor again
- mov si,ax ;save last quotient in si
- ;this will be first digit after decimal
-
- mov ax,dx ;put latest remainder into ax
- xor dx,dx ;zero dx
- mul cx ;multiply by 10
- div bx ;divide by divisor
- ;this will be second digit after decimal
- mov bx,ax ;save quotient in bx
-
-
- ;------- END DIVISION ROUTINES ----------
-
-
- pop ax ;get back low word
- pop dx ;get back high word
-
- push bx ;save second decimal digit
- push si ;save first decimal digit
- call convert ;write main ratio
-
- mov ah,02h
- mov dl,"." ;write decimal point "."
- int 21h
- pop si ;get back first decimal digit
- pop bx ;get back second decimal digit
-
- mov dx,si ;put first digit into dx
- xor dh,dh
- add dl,48 ;convert to ascii
- int 21h ;write first digit after decimal to screen
-
- mov dl,bl ;put second digit into dl
- add dl,48 ;convert to ascii
- int 21h ;write second digit after decimal to screen
-
- mov dl,"%" ;"%"
- int 21h
-
-
- ;---------- END INFO DECORATION -----------
-
- final:
-
- ret
- compression endp
-
-
- ;------------------------------------------------------------------------
- ; |
- ; MAIN DECOMPRESSION SEQUENCE |
- ; |
- ;------------------------------------------------------------------------
-
-
- decompression proc near PRIVATE
-
- call open_source_file ;open source file
- .IF CARRY?
- jmp final
- .ENDIF
-
- push es
- mov ah,4eh ;function 4eh
- mov cx,10h ;file attribute
- mov dx,offset filename ;point to filespec
- int 21h ;"first match" interrupt
-
- mov ah,2fh
- int 21h ;get dta
- mov ax,word ptr es:[bx+1ah] ;get file size from dta
- mov dx,word ptr es:[bx+1ah][2]
- sub ax,12 ;subtract 12 bytes because file size
- sbb dx,0000h ;and code are contained in these bytes
- mov word ptr [comp_size][0],ax ;record compressed file size (from dta)
- mov word ptr [comp_size][2],dx ;date and time also take 4 bytes
- pop es
-
- readfile work_code,file_handle,4 ;first four bytes are compression code
-
- mov si,offset work_code ;check to see if code is ¿MP¿
- mov ax, word ptr [si] ;check first pair
- cmp ax,"M¿"
- jne @F
- add si,2
- mov ax,word ptr [si] ;if ok, check second pair
- cmp ax,"¿P"
- je code_ok
- @@:
- writestring sorry0,SIZEOF sorry0
- writestring filename,filelen
- writestring sorry1,SIZEOF sorry1
- jmp final
- code_ok:
-
- call open_second_file ;open target file
- .IF CARRY?
- jmp final
- .ENDIF
-
- .IF nologo_flag != 01h
- writestring logo,SIZEOF logo
- .ENDIF
-
- writestring expand,SIZEOF expand ;"Expanding [] --> []
- writestring filename,filelen
- writestring arrow,SIZEOF arrow
- writestring tempfile,filelen2
- writestring crlf,2
-
- readfile init_size,file_handle,4 ;second four bytes are file size
- readfile xdate,file_handle,2 ;read file date into variable
- readfile xtime,file_handle,2 ;read file time into variable
-
- mov di,offset outbuffer
-
- call read_file ;read file into buffer
- mov dx,word ptr es:[si] ;load dx with first word
- inc si
- inc si
- inc incount
- inc incount
-
- ;--------------------MAIN DECOMPRESSION ROUTINE---------------------
-
- .WHILE 1 ;loop indefinitely
-
- xor ax,ax
- mov al,sixteen_count ;get saved bit count
- xor cx,cx ;We need to read CODESIZE bits from input stream.
- xor bx,bx ;We read a bit at a time from dx into bx. The
- bigloop: ;leftover is left in dx for next time.
- cmp cx,CODESIZE ;see if CODESIZE bits have been read
- je outbig ;if not,read more bits
- ;if so, we have full CODESIZE bit pointer. Leave.
- shl dx,1
- rcl bx,1 ;bx will have pointer. Shift in unison with
- inc cx ;dx. Ax keeps bit count ofn dx. If dx is empty,
- dec ax ;fall through and fill dx again.
- jnz bigloop
-
-
- ;-------------------JUMP-OUTS & FALL-THROUGHS ----------------------------
- ; ;
- mov ax,incount ;check byte count ;
- cmp ax,bytes_read ;see if all bytes have been processed from buffer
- jae check_end ;if not, proceed ;
- next0: ;
- mov dx, word ptr es:[si] ;put new word into dl ;
- inc incount ;count it ;
- inc incount ;
- inc si ;
- inc si ;
- mov ax,16 ;reset bit count ;
- jmp bigloop ;
- ;
- check_end: ;jump-out from above loop ;
- .IF end_flag==01h ;
- mov all_over,01h ;
- jmp outbig ;
- .ENDIF ;
- call read_file ;read more of file into buffer ;
- jmp next0 ;
- ; ;
- ;-----------------------------------------------------------------------;
-
-
- outbig:
- mov sixteen_count,al ;save bit count
- mov trieptr,bx ;put completed pointer into trieptr
- ;GOT POINTER, NOW START
- mov ax,curptr
- mov prevptr,ax ;prevptr=curptr
- mov ax,curlen
- mov prevlen,ax ;prevlen=curlen
- mov ax,trieptr
- mov curptr,ax ;curptr=trieptr
- mov curlen,0000h
-
-
- .WHILE trieptr != NILPTR
- ctr1 trieptr ;get character for trieptr (ret in ax)
- mov bx,MAXMATCH
- sub bx,curlen ;index to end of match array. Place
- mov match[bx],al ;character there. Match array is
- inc curlen ;written back to front.
- parent trieptr ;get parent of trieptr (ret in ax)
- mov trieptr,ax ;Move into trieptr. Walk up the tree
- .ENDW ;reading the nodes until we reach the
- ;root (NILPTR).
-
- mov ax,MAXMATCH ;curlen has length of string
- sub ax,curlen ;move back in match array to where
- inc ax ;string starts.
- mov mstart,ax ;move index into mstart. This is used
- xor bx,bx ;by update routine.
- mov bx,ax ;index into array
- .WHILE bx <= MAXMATCH ;max match length is 100 letters
- mov al,match[bx] ;write the string in the match array
- stosb ;to the output buffer.
- inc outcount
- cmp outcount,8000 ;buffer holds 8000. When full,
- jae chkout ;flush buffer. Do this outside of
- chkout_ret: ;loop. Hate to put jump-arounds
- inc bx ;inside loop. Fall-through is faster
- .ENDW
- cmp all_over,01h
- je endup
-
- call update
-
- .ENDW
-
- chkout:
- call write_file ;jumpout from above loop
- jmp chkout_ret
-
-
- endup:
-
- call adjust
- call write_file
-
- writestring fsize,SIZEOF fsize
- mov ax,word ptr comp_size[0] ;"Init. Size: "
- mov dx,word ptr comp_size[2]
- call convert
-
- writestring crlf,2
- writestring fnl,SIZEOF fnl ;"Final Size: "
- mov ax,word ptr master_size[0]
- mov dx,word ptr master_size[2]
- call convert
-
-
- mov ah,57h
- mov al,01h
- mov bx,file_handle2 ;set date and time
- mov cx,xtime
- mov dx,xdate
- int 21h
-
- final:
-
- ret
- decompression endp
-
-
- ;----------------------------------------------------------------------
- ; INITIALIZE procedure. Initializes variables and the first 256 nodes
- ; of the tree. The first 256 nodes represent the first level of the
- ; tree and they will never be deleted in this particular data structure.
- ;----------------------------------------------------------------------
-
- initialize proc near
-
- mov di,ds
- mov es,di
-
- mov ax,NILPTR
- mov di,offset fielder ;fill fields with NILPTR
- mov cx,DICTIONARY*2
- rep stosw
-
- mov ax,NILPTR
- mov di,seg rsibfield ;now do far segment
- mov es,di
- mov di,offset fielder_two
- mov cx,DICTIONARY*3
- rep stosw
-
- mov dictsize,00h
- xor bx,bx
- .WHILE dictsize < STATICSIZE ;fill first 256 bytes of ctr
- mov ax,dictsize ;(character) field with usual
- mov ctrfield[bx],ax ;dos characters 00-255.
- inc bx ;These will comprise the non-
- inc bx ;erasable potion of the dictionary.
- inc dictsize
- .ENDW
-
- mov curptr,NILPTR
- mov curlen,00h
- ret
- initialize endp
-
-
- ;-----------------------------------------------------------------------
- ; UPDATE procedure. Updates the tree by adding and deleting appropriate
- ; leaves. Also determines which codewords will be re-used and which
- ; leaves will be deleted from the tree.
- ; Input: from main program - trieptr, prevptr, match, etc.
- ;------------------------------------------------------------------------
-
- update proc near
- LOCAL update_flag:BYTE
-
- cmp prevptr,NILPTR
- je update_out
-
- mov update_flag,00h
- mov mindex,00h
-
-
- .WHILE 1 ;loop indefinitely
-
- mov ax,curlen ;Don't loop more than current
- cmp mindex,ax ;match length. Leaf nodes are
- jae update_out ;added to previous pointer.
-
- cmp mindex,MAXINCREMENT ;MAXINCREMENT is number of pre-
- jae update_out ;fixes to add. Read on...
-
- mov ax,prevlen ;Maximum match length is 100
- add ax,mindex ;in this code. This routine
- cmp ax,MAXMATCH ;adds prefixes one-at-a-time
- jae update_out ;per loop. Variable mindex is
- ;the loop counter.
-
- mov bx,mstart ;mstart from main program
- add bx,mindex ;index into match array
- xor ah,ah ;make into word
- mov al,match[bx] ;get letter from array
- mov mval,ax ;record in mval
-
- cmp update_flag,01h ;skip the child search after first
- je skip_this ;leaf added since the rest will be
- ;new and without children
-
- child prevptr,mval ;see if mval is child of prevptr
- mov trieptr,ax ;record result in trieptr
-
- ;If no match was found, we will
- ;walk down the tree from prevptr and
- ;add leaves consisting of the
- ;matched prefixes. The number
- ;added is regulated by MAXINCREMENT.
-
- .IF trieptr == NILPTR ;if not child, then update tree
- mov update_flag,01h
- skip_this:
-
- .IF dictsize <= MAXPTR
- mov ax,dictsize ;get free unused pointer
- mov trieptr,ax ;move pointer into trieptr
- inc dictsize ;increment to next available pointer
- .ELSE ;No more unused pointers?
- mov cx,bigplace ;Get saved place. Will loop around
- bigtop: ;from 256 to 8190 progressively and
- cmp cx,prevptr ;re-use pointers. Don't use prevptr.
- je around ;We'll get the oldest leaf without
- cmp cx,MAXPTR ;children, delete the leaf,update the
- ja bigtop2 ;tree with recycled pointer.Over max?
- bigtop1: ;Then reset to 256 and start over.
- mov bx,cx ;put into bx
- shl bx,1 ;word index
- cmp lcfield[bx],NILPTR ;see if pointer has children
- ;NILPTR means no children for this
- je bigtopout ;node and we can erase & re-use it,
- around: ;else keep trying
- inc cx ;go to next pointer
- jmp bigtop ;repeat
-
- bigtop2: ;jump-out from above loop
- mov cx,STATICSIZE ;reset to 256
- jmp bigtop1
- bigtopout:
-
- mov trieptr,cx ;put leaf node in trieptr
- inc cx ;advance for progressive usage
- mov bigplace,cx ;Record in bigplace. We'll start
- ;here next time we look for a pointer
- ;to re-use.
- delete_leaf ;delete leaf node and,if necessary,
- ;reconnect siblings
- .ENDIF
-
- add_leaf ;add leaf to tree
-
- .ENDIF
- mov ax,trieptr
- mov prevptr,ax
- inc mindex
- .ENDW
-
- update_out:
- ret
- update endp
-
-
- ;----------------------------------------------------------------------
- ; READ_FILE procedure. Reads the file into a buffer for processing
- ; by the program.
- ; Input: None
- ; Output: eof flag
- ;-----------------------------------------------------------------------
-
- read_file proc near USES dx cx bx ax
- mov bytes_read,0000h
- mov ah,3fh
- mov bx,file_handle
- mov cx, 8000
- push ds
- ASSUME ds:seg inbuffer
- mov dx,es
- mov ds,dx
- mov dx,offset inbuffer
- int 21h
- ASSUME ds:@data
- pop ds
- mov bytes_read,ax ;Number of bytes read will be in ax. Less
- .IF ax < 8000 ;than requested bytes indicates eof.
- mov end_flag,01h
- .ENDIF
- mov si,offset inbuffer
- mov incount,0000h
- ret
- read_file endp
-
- ;---------------------------------------------------------------------
- ; WRITE_FILE procedure. Writes the output buffer to disk.
- ; Input: none
- ;---------------------------------------------------------------------
-
- write_file proc near USES ax bx cx dx
- mov cx,outcount
- add word ptr master_size[0],cx ;keeps track of bytes written
- adc word ptr master_size[2],0000h ;to file in variable master_size
- mov ah,40h ;This variable will be referenced
- mov bx,file_handle2 ;later by adjustment routine in
- push ds ;the decoder. Encoder doesn't
- mov dx,es ;use this variable so no harm done
- mov ds,dx ;there.
- ASSUME ds:seg outbuffer
- mov dx, offset outbuffer
- int 21h
- ASSUME ds:@data
- pop ds
- mov di,offset outbuffer
- mov outcount,0000h
- ret
- write_file endp
-
- ;--------------------------------------------------------------------------
- ; CMDLINE procedure. Parses the command line for files.
- ;-------------------------------------------------------------------------
-
- cmdline proc near C USES ax bx es si,arg1:WORD,arg2:WORD,arg3:WORD
- LOCAL go_around_flag:BYTE
-
- mov go_around_flag,00h
-
- mov ah,51h ;get psp
- int 21h
- mov es,bx ;move psp seg into es
- xor cx,cx
- xor bx,bx
- .IF es:[80h]==cl ;check for command line arguments
- ret ;value at 80h of the psp is number of bytes
- .ENDIF ;in command line argument.
-
- mov bx,82h ;find start of command line
- mov si,arg1 ;buffer for command line
- @@:
- mov al,es:[bx] ;get letter from dta (command line)
- .IF al==13 || al==00
- ret
- .ENDIF
- .IF al < 33 && go_around_flag==0
- mov si,arg2 ;buffer for output file
- inc go_around_flag
- mov filelen,cx
- xor cx,cx
- jmp skp
- .ENDIF
- .IF al < 33 && go_around_flag==1
- mov si,arg3 ;buffer for parameter line
- mov filelen2,cx
- mov paraflag,01h
- inc go_around_flag
- .ENDIF
- mov [si],al
- inc si
- skp:
- inc bx
- inc cx ;byte count of file name
- jmp @B
- cmdline endp
-
- ;--------------------------------------------------------------------------
- ; |
- ; ADJUSTMENT FOR DECOMPRESSION FINAL OUTPUT |
- ; |
- ;--------------------------------------------------------------------------
-
- adjust proc near PRIVATE
- ;NOTES:
- ;This decompression routine may add on an extra byte or two
- ;on the end. This is usually the letter coded as 00 since the
- ;encoder has to flush the final byte to the file, whether it is
- ;full or not. These extra bytes are removed by this routine. The
- ;decoder never generates fewer bytes than what the final size
- ;of the file should be. The encoder writes the original file
- ;size to the file at bytes 5-8. The decoder keeps track of the
- ;number of bytes generated and compares it with this figure.
- ;This routine then subtacts the extra byte(s) off the end. Don't
- ;worry, they're never needed and are really extra bytes. This
- ;is one way to do things and it works without fail.
-
- mov dx,word ptr [master_size][2] ;load high word of byte count
- mov ax,word ptr [master_size][0] ;load low word of byte count
- add ax,[outcount] ;add the last count
- adc dx,0000h ;add the carry flag to high word
-
- cmp dx,word ptr [init_size][2] ;see if high word of byte count=
- je nxt ;high word of header info.
- cmp dx,word ptr [init_size][2] ;see if high word of byte count is
- jb lving ;lower than high word of header info
- sub ax,word ptr [init_size][0] ;If so , leave without adjusting
- sbb dx,word ptr [init_size][2] ;otherwise, if byte count is greater
- ;than header info, subtact the two
- sub [outcount],ax ;and take the difference off the end
- jmp short lving ;so that the file size will be correct
- nxt:
- cmp ax,word ptr [init_size] ;compare low words
- jbe lving ;if expanded file is smaller, leave
- sub ax,word ptr [init_size] ;without adjusting, otherwise, subtract
- sub [outcount],ax ;the two and take the difference off
- ;the end again
- lving:
- ret
- adjust endp
-
-
- ;---------------------------------------------------------------------------
- ; |
- ; OPENS SOURCE FILE |
- ; Input: Nothing |
- ; Output: Carry flag set if error |
- ;---------------------------------------------------------------------------
-
-
- open_source_file proc near
-
- mov ah,3dh
- mov al,00h
- mov dx,offset filename ;open input file
- int 21h
- .IF CARRY? ;check to see if carry flag is set
- print error
- stc
- jmp @F
- .ENDIF
- mov file_handle,ax
- @@:
- ret
- open_source_file endp
-
- ;---------------------------------------------------------------------------
- ; |
- ; OPENS OUTPUT FILE |
- ; Input: Nothing |
- ; Output: Carry flag set if error |
- ;---------------------------------------------------------------------------
-
- open_second_file proc near
- mov ah,3ch
- mov cx,00h ;create/truncate output file
- mov dx,offset tempfile
- int 21h
- .IF CARRY? ;check to see if carry flag is set
- print error
- stc
- jmp @F
- .ENDIF
- mov file_handle2,ax
- @@:
- ret
- open_second_file endp
-
-
- ;--------------------------------------------------------------------------
- ; CAPS procedure |
- ; Capitalizes any string |
- ; Input: near pointer to string,length of buffer |
- ;--------------------------------------------------------------------------
-
- caps proc near C PUBLIC USES si cx ax,arg1:WORD,arg2:WORD
- mov si,arg1
- xor cx,cx
- mov cx,arg2
- cap_loop:
- mov al,[si]
- cmp al,61h ;below a?
- jb capon
- cmp al,7ah ;above z?
- ja capon
- AND al,0dfh ;clear the 6th bit
- mov [si],al
- capon:
- inc si
- loop cap_loop
- ret
- caps endp
-
-
- ;--------------------------------------------------------------------------
- ; CONVERT procedure |
- ; Converts any Hex number to displayable decimal string. |
- ; Input: Doubleword in dx:ax |
- ; Output: To screen automatically (from variable: ascii_string) |
- ;--------------------------------------------------------------------------
-
-
- convert proc near
-
- mov di,ds
- mov es,di ;load ds into es
-
- mov si,10 ;load base 10 into si
- xor bx,bx ;zero bx
- xor cx,cx ;zero cx
- mov bx,ax ;move low word into bx
- mov cx,dx ;load high word into cx
- mov dx,-1 ;set end-of-string flag
- push dx ;save it as last string byte
- hex_loop_1:
- xchg ax,cx ;load ax with high word
- xor dx,dx ;zero dx
- div si ;divde by 10, leave rem. in dx
- xchg cx,ax ;save high word back in cx
- xchg ax,bx ;load ax with low word
- div si ;divide by ten
- xchg bx,ax ;save low word back in bx
- add dl,30h ;convert remainder to ascii
- push dx ;save as digit
- or bx,bx ;see if low word is zero
- jne hex_loop_1
- or cx,cx ;see if high word is zero
- jne hex_loop_1
-
- xor cx,cx
- xor dx,dx
- lea di, ascii_string ;point di to string
-
- hex_loop_2:
- pop ax ;pop first digit
- cmp ax,-1 ;see if end-of-string
- je write_it
- stosb ;write it to string
- inc cx ;increase digit counter
- jmp hex_loop_2
-
- write_it:
- cmp cx,1 ;see if string is 1 digit long
- jne write ;if not, write string
- lea di, ascii_string ;point di to string
- mov al,[di] ;move only digit to al
- mov ah,"0" ;move "0" into ah
- mov [di+1],al ;write "0" as first letter
- mov [di],ah ;write digit as second
- mov cx,2 ;adjust counter to two
- write:
- mov [count],cx
- writestring ascii_string,count
-
- ret
- convert endp
-
- END